home *** CD-ROM | disk | FTP | other *** search
/ Almathera Ten Pack 3: CDPD 3 / Almathera Ten on Ten - Disc 3: CDPD3.iso / scope / 101-125 / scopedisk106 / bbs-index / src / expression.c < prev    next >
Encoding:
C/C++ Source or Header  |  1995-03-19  |  18.1 KB  |  520 lines

  1. /*
  2.  * EXPRESSION.C
  3.  *
  4.  * This module contains the routines needed to parse and evaluate an
  5.  * expression. This is used while selecting which files to print.
  6.  *
  7.  *     This is the BNF for the set of expressions recognised by the
  8.  *     database searcher. It doesn't contain any semantic information however.
  9.  *
  10.  *     Expression                      ::= Boolean | Boolean Operator Expression | ALL
  11.  *     Operator                        ::= AND | OR 
  12.  *     Boolean                         ::= NOT Boolean                         |
  13.  *                                                 '(' Expression ')'          |
  14.  *                                                     BoolIdent                               |
  15.  *                                                     NumIdent        Op Value        |
  16.  *                                                     StringIdent Op String   |
  17.  *                                                     DateIdent       Op Date
  18.  *     Op                                      ::= '<' | '>' | '<=' | '>=' | '=' | '<>'
  19.  *     BoolIdent                       ::= BINARY | ONLINE | LOCAL | VALID
  20.  *     StringIdent                     ::= COMMENT | DISKNAME | NAME | OWNER | PATHNAME
  21.  *     NumIdent                        ::= ACCESS | SECTION | DIRECTORY | DISKDIRNUM |
  22.  *                                                     SIZE | KSIZE
  23.  *     DateIdent                       ::= DATE
  24.  *     String                          ::= (Text enclosed in quotes)
  25.  *     Value                           ::= (A number)
  26.  *     Date                            ::= (A date in the format "dd-mon-yy")
  27.  *
  28.  */
  29.  
  30. #ifndef LATTICE_50
  31. #include "system.h"
  32. #endif
  33.  
  34. #include "bbsindex.h"
  35.  
  36. #define mystrcmp stricmp               /* Make match() insensitive to case */
  37.  
  38. /*
  39.  *             BNF procedures
  40.  */
  41.  
  42. void bnf_op(), bnf_date(), bnf_expression(), bnf_boolean();
  43.  
  44. /*
  45.  *             Global variable(s)
  46.  */
  47.  
  48. static int treepos;                            /* Next free entry in tree array        */
  49. static int curtoken;                   /* Current token                                        */
  50. static int curvalue;                   /* Current number with E_NUMBER         */
  51. static char curstring[MAXCOM]; /* Current string with E_STRING         */
  52.  
  53. /*
  54.  *             Tokens recognised as special in expressions
  55.  */
  56.  
  57. struct {
  58.        int tag;
  59.        char *name;
  60. } tokens[] = {
  61.        
  62.        {       E_EQ,                   "="                     },
  63.        {       E_NE,                   "<>"            },
  64.        {       E_LE,                   "<="            },
  65.        {       E_GE,                   ">="            },
  66.        {       E_LT,                   "<"                     },
  67.        {       E_GT,                   ">"                     },
  68.        {       E_AND,                  "AND"           },
  69.        {       E_OR,                   "OR"            },
  70.        {       E_NOT,                  "NOT"           },
  71.        {       E_ALL,                  "ALL"           },
  72.        {       E_OPENPAR,              "("                     },
  73.        {       E_CLOSEPAR,             ")"                     },
  74.        {       E_TEXT,                 "TEXT"          },
  75.        {       E_REMOTE,               "REMOTE"        },
  76.        {       E_INVALID,              "INVALID"       },
  77.        {       E_OFFLINE,              "OFFLINE"       },
  78.        {       NULL,                   NULL            }
  79. };
  80.  
  81. /*
  82.  *             wild()
  83.  *             ------
  84.  *             This routine does a wildcard match on the two specified strings.
  85.  *             The first string contains the string to check, and the second string
  86.  *             containts the wildcard pattern to check against. The special wild
  87.  *             card characters are '?' which matches any single characters, and
  88.  *             '*' which matches any number of characters. 0 is returned if the
  89.  *             the two strings match, else a +ve or -ve number.
  90.  *
  91.  */
  92. int wild(s,w)
  93. char *s;
  94. char *w;
  95. {
  96.        char *p;
  97.        char ch;
  98.  
  99.        while (*w && *s) {
  100.                switch (*w) {
  101.  
  102.                        case '*':
  103.                                ch = toupper(w[1]);
  104.                                if (!ch)
  105.                                        return (0);
  106.                                for (p = s; *p; p++) {
  107.                                        if (toupper(*p) == ch || ch == '?') {
  108.                                                if (!wild(p,w+1))
  109.                                                        return (0);
  110.                                        }
  111.                                }
  112.                                return (toupper(*p)-ch);
  113.  
  114.                        case '?':
  115.                                break;
  116.  
  117.                        default:
  118.                                if (toupper(*s) != toupper(*w))
  119.                                        return (toupper(*s)-toupper(*w));
  120.                }
  121.                w++;
  122.                s++;
  123.        }
  124.        return (toupper(*s)-toupper(*w));
  125. }
  126.  
  127.  
  128. /*
  129.  *             Dirty great macro to compare two values according to an operator
  130.  */
  131. #define e_cmp(v1,op,v2)                                                \
  132.        switch (op) {                                                   \
  133.                case E_EQ: return ((v1) == (v2));       \
  134.                case E_NE: return ((v1) != (v2));       \
  135.                case E_LT: return ((v1) <  (v2));       \
  136.                case E_GT: return ((v1) >  (v2));       \
  137.                case E_LE: return ((v1) <= (v2));       \
  138.                case E_GE: return ((v1) >= (v2));       \
  139.        }
  140.  
  141. #define e_numcmp(n)            e_cmp(n, e->op, e->num)
  142. #define e_strcmp(s)            e_cmp(mystrcmp(s,e->text), e->op, 0)
  143. #define e_wildcmp(s)   e_cmp(wild(s,e->text), e->op, 0)
  144.  
  145. /*
  146.  *             match()
  147.  *             -------
  148.  *             Scans the the parse tree, and returns TRUE if the current
  149.  *             record matches the criteria in the tree, else FALSE.
  150.  *
  151.  *             Aside: Aren't C macros wonderful? Just think how long this function
  152.  *             would be if it wasn't for the above e_cmp, e_numcmp and e_strcmp
  153.  *             macros.
  154.  */
  155. int match(p, e)
  156. UDHEAD *p;
  157. EXPR *e;
  158. {
  159.        switch (e->field) {
  160.                
  161.                case E_AND:                     return (match(p, e->left) && match(p, e->right));
  162.                case E_OR:                      return (match(p, e->left) || match(p, e->right));
  163.                case E_NOT:                     return (!match(p, e->left));
  164.                case E_ALL:                     return (TRUE);
  165.                case I_LOCAL:           return ((int)p->local);
  166.                case I_BINARY:          return ((int)p->bin);
  167.                case I_VALID:           return ((int)p->valid);
  168.                case I_ONLINE:          return ((int)p->online);
  169.                case E_REMOTE:          return (!(int)p->local);
  170.                case E_TEXT:            return (!(int)p->bin);
  171.                case E_INVALID:         return (!(int)p->valid);
  172.                case E_OFFLINE:         return (!(int)p->online);
  173.                case I_ACCESS:          e_numcmp(p->accesses);
  174.                case I_DATE:            e_numcmp(p->date);
  175.                case I_DIRECTORY:       e_numcmp(p->dir);
  176.                case I_DISKDIRNUM:      e_numcmp(p->dirnum);
  177.                case I_KSIZE:           e_numcmp(BTOK(p->length));
  178.                case I_SECTION:         e_numcmp(p->section);
  179.                case I_SIZE:            e_numcmp(p->length);
  180.                case I_DISKNAME:        e_strcmp(p->disk_name);
  181.                case I_COMMENT:         e_strcmp(p->desc);
  182.                case I_NAME:            e_strcmp(p->cat_name);
  183.                case I_OWNER:           e_strcmp(p->owner);
  184.                case W_DISKNAME:        e_wildcmp(p->disk_name);
  185.                case W_COMMENT:         e_wildcmp(p->desc);
  186.                case W_NAME:            e_wildcmp(p->cat_name);
  187.                case W_OWNER:           e_wildcmp(p->owner);
  188.        }
  189. }
  190.  
  191. /*
  192.  *             newnode()
  193.  *             ---------
  194.  *             This function allocates a new node from the tree array, and returns
  195.  *             a pointer to it. If overflow occurs, it aborts with an error message.
  196.  */
  197.  
  198. EXPR *newnode()
  199. {
  200.        treepos++;
  201.        if (treepos >= MAXEXPR) {
  202.                scripterror("expression to complex\n");
  203.                Cleanup(10);
  204.        }
  205.        return (&tree[treepos]);
  206. }
  207.  
  208. /*
  209.  *             readtoken()
  210.  *             -----------
  211.  *             This function reads the next token from the command line, starting
  212.  *             at position curpos. curtoken is set to the type of token received.
  213.  *             If it was E_STRING, then curstring points to the string it
  214.  *             represents (without the quotes). If it was E_NUM, then curvalue
  215.  *             points to the number represented. If the end of the line is
  216.  *             reached, E_END is automatically set.
  217.  */
  218.  
  219. #define nextch()               (ch = combuf[compos++])
  220. #define ungetnext()            (compos--)
  221.  
  222. void readtoken()
  223. {
  224.        char *p = curstring, ch;
  225.        int i;
  226.  
  227.        if (compos >= comlen) {
  228.                curtoken = E_END;
  229.                return;
  230.        }
  231.  
  232.        do {
  233.                nextch();
  234.        } while (ch == CHAR_SPACE);
  235.  
  236.        if (ch == CHAR_QUOTES) {                        /* Handle string in quotes */
  237.                curtoken = E_STRING;
  238.                nextch();
  239.                if (*p == CHAR_NULL) {
  240.                        curtoken = E_END;
  241.                        return;
  242.                }
  243.                do {
  244.                        *p++ = ch;
  245.                        nextch();
  246.                } while (ch && ch != CHAR_QUOTES);
  247.                *p = CHAR_NULL;
  248.                return;
  249.        }
  250.  
  251.        if (isdigit(ch)) {                              /* Numeric value? */
  252.                curtoken = E_NUMBER;
  253.                curvalue = 0;
  254.                do {
  255.                        curvalue = curvalue * 10 + (ch - '0');
  256.                        nextch();
  257.                } while (isdigit(ch));
  258.                ungetnext();
  259.                return;
  260.        }
  261.  
  262.        if (isalpha(ch)) {                              /* Alphabetic identifier? */
  263.                do {
  264.                        *p++ = ch;
  265.                        nextch();
  266.                } while (isalpha(ch));
  267.        } else if (ch == CHAR_NULL) {   /* At end of line? */
  268.                curtoken = E_END;
  269.                return;
  270.        } else {                                                /* Must be punctuation */
  271.                do {
  272.                        *p++ = ch;
  273.                        nextch();
  274.                } while (ch && !isalpha(ch) && !isdigit(ch) && ch != CHAR_SPACE
  275.                                                                        && ch != CHAR_QUOTES);
  276.        }
  277.        *p = CHAR_NULL;
  278.        ungetnext();
  279.  
  280.        /* Check for character A - F */
  281.        if (curstring[1] == CHAR_NULL) {
  282.                switch (curstring[0]) {
  283.                        case 'A': case 'B': case 'C':
  284.                        case 'D': case 'E': case 'F':
  285.                                curvalue = curstring[0] + 10 - 'A';
  286.                                curtoken = E_NUMBER;
  287.                                return;
  288.                }
  289.        }
  290.  
  291.        /* Now find token that matches string */
  292.        for (i = 0; i < MAXINDEX; i++) {
  293.                if (!strcmp(curstring, indexes[i].name)) {
  294.                        curtoken = indexes[i].tag;
  295.                        return;
  296.                }
  297.        }
  298.        for (i = 0; tokens[i].name; i++) {
  299.                if (!strcmp(curstring, tokens[i].name)) {
  300.                        curtoken = tokens[i].tag;
  301.                        return;
  302.                }
  303.        }
  304.        scripterror("unrecognised symbol '");
  305.        print2(curstring,"' in expression\n");
  306.        Cleanup(10);
  307. }
  308.  
  309.  
  310. /*
  311.  *             com_select()
  312.  *             ------------
  313.  *             This command parses the expression in the command buffer starting
  314.  *             at position compos, and builds an expression tree representing it.
  315.  *             This tree is stored in tree[], starting at element 0. This expression
  316.  *             tree is used when match() is called.
  317.  */
  318.  
  319. void com_select()
  320. {
  321.        treepos = 0;
  322.        readtoken();
  323.        bnf_expression(tree);
  324. }
  325.  
  326. /*
  327.  *             bnf_expression()
  328.  *             ----------------
  329.  *             Parses the BNF line:
  330.  *
  331.  *             Expression ::= Boolean | Boolean Operator Expression | ALL
  332.  *
  333.  */
  334. void bnf_expression(e)
  335. EXPR *e;
  336. {
  337.        EXPR dummy;
  338.  
  339.        if (curtoken == E_ALL)
  340.                e->field = E_ALL;
  341.        else {
  342.                bnf_boolean(&dummy);
  343.                if (curtoken == E_AND || curtoken == E_OR) {
  344.                        e->field   = curtoken;
  345.                        e->left    = newnode();
  346.                        e->right   = newnode();
  347.                        *(e->left) = dummy;
  348.                        readtoken();
  349.                        bnf_expression(e->right);
  350.                } else
  351.                        *e = dummy;
  352.        }
  353. }
  354.  
  355. /*
  356.  *             bnf_boolean()
  357.  *             -------------
  358.  *             Parses the following BNF line:
  359.  *
  360.  *             Boolean ::= NOT Boolean                         |
  361.  *                                 '(' Expression ')'          |
  362.  *                                     BoolIdent                               |
  363.  *                                     NumIdent        Op Value        |
  364.  *                                     StringIdent Op String   |
  365.  *                                     DateIdent       Op Date
  366.  */
  367. void bnf_boolean(e)
  368. EXPR *e;
  369. {
  370.        char *p;
  371.  
  372.        e->field = curtoken;
  373.        switch (curtoken) {
  374.  
  375.                case E_NOT:
  376.                        e->left = newnode();
  377.                        readtoken();
  378.                        bnf_boolean(e->left);
  379.                        break;
  380.  
  381.                case E_OPENPAR:
  382.                        readtoken();
  383.                        bnf_expression(e);
  384.                        if (curtoken != E_CLOSEPAR) {
  385.                                scripterror("missing close parenthesis\n");
  386.                                Cleanup(10);
  387.                        }
  388.                        readtoken();
  389.                        break;
  390.  
  391.                case I_BINARY:
  392.                case I_VALID:
  393.                case I_ONLINE:
  394.                case I_LOCAL:
  395.                case E_TEXT:
  396.                case E_INVALID:
  397.                case E_OFFLINE:
  398.                case E_REMOTE:
  399.                        readtoken();
  400.                        break;
  401.  
  402.                case I_ACCESS:
  403.                case I_SECTION:
  404.                case I_DIRECTORY:
  405.                case I_DISKDIRNUM:
  406.                case I_SIZE:
  407.                case I_KSIZE:
  408.                        readtoken();
  409.                        bnf_op(e);
  410.                        if (curtoken != E_NUMBER) {
  411.                                scripterror("number expected in expression\n");
  412.                                Cleanup(10);
  413.                        }
  414.                        e->num = curvalue;
  415.                        readtoken();
  416.                        break;
  417.  
  418.                case I_COMMENT:
  419.                case I_DISKNAME:
  420.                case I_NAME:
  421.                case I_OWNER:
  422.                case I_PATHNAME:
  423.                        readtoken();
  424.                        bnf_op(e);
  425.                        if (curtoken != E_STRING) {
  426.                                scripterror("string expected in expression\n");
  427.                                Cleanup(10);
  428.                        }
  429.                        e->text = mymalloc(strlen(curstring)+1);
  430.                        strcpy(e->text, curstring);
  431.                        /*
  432.                         *              Now check the string, and see if it contains any wildcard
  433.                         *              characters. If it does, use operation W_{string}
  434.                         *              instead of E_{string}. If it doesn't, use the standard
  435.                         *              strcmp which is much faster.
  436.                         */
  437.                        for (p = curstring; *p; *p++) {
  438.                                if (*p == '*' || *p == '?') {
  439.                                        e->field = tokentowild(e->field);
  440.                                        break;
  441.                                }
  442.                        }
  443.                        readtoken();
  444.                        break;
  445.  
  446.                case I_DATE:
  447.                        readtoken();
  448.                        bnf_op(e);
  449.                        bnf_date(e);
  450.                        break;
  451.  
  452.                default:
  453.                        scripterror("error in expression\n");
  454.                        Cleanup(10);
  455.        }
  456. }
  457.  
  458. /*
  459.  *             bnf_op
  460.  *             ------
  461.  *             Sets the operation field of the specified expression to the value
  462.  *             of the next token on the command line.
  463.  */
  464. void bnf_op(e)
  465. EXPR *e;
  466. {
  467.        switch (curtoken) {
  468.                case E_EQ:
  469.                case E_NE:
  470.                case E_LT:
  471.                case E_GT:
  472.                case E_LE:
  473.                case E_GE:
  474.                        e->op = curtoken;
  475.                        readtoken();
  476.                        break;
  477.  
  478.                default:
  479.                        scripterror("expected comparison operator\n");
  480.                        Cleanup(10);
  481.        }
  482. }
  483.  
  484. /*
  485.  *             bnf_date()
  486.  *             -----------
  487.  *             Reads a date from the command line, validates it, and sets the
  488.  *             'num' field in the expression to its numerical value. The date
  489.  *             is in the format "dd-mon-yy", e.g. "15-Jun-89"
  490.  */
  491. void bnf_date(e)
  492. EXPR *e;
  493. {
  494.        int day, month, year;
  495.  
  496.        if (curtoken != E_STRING) {
  497.                scripterror("expected date string in expression\n");
  498.                Cleanup(10);
  499.        }
  500.  
  501.        if (strlen(curstring) != 9 ||
  502.                        curstring[2] != '-' || curstring[6] != '-') {
  503.                scripterror("Invalid date format (should be dd-mon-yy)\n");
  504.                Cleanup(10);
  505.        }
  506.  
  507.        day = atoi(curstring);
  508.        year = atoi(curstring+7);
  509.        for (month = 0; month < 12 && strnicmp(curstring+3, months[month], 3);
  510.                                                                                                                month++)
  511.                ;
  512.        if ((day < 1 || day > 31) || (month < 1 || month > 12) ||
  513.                                (year < 1 || year > 99)) {
  514.                scripterror("Invalid date format (should be dd-mon-yy)\n");
  515.                Cleanup(10);
  516.        }
  517.        e->num = (((year * 13) + month) << 5) + day;
  518.        readtoken();
  519. }
  520.